1) data processing
## preprocessing
df = read.csv("merged_interpolated.csv")
df <- df[,c('datetime', 'tempc', 'net_load')]
names(df) <- c('dt', 'temp', 'demand')
merged_interpolated.csv [datetime]: ACST - Australia Central
Standard Time
as.numeric: transform Australia h/min/s => UTC time 00:00:00
=> number
Simon: first postpone dt 23:30 + 0930 saved as df$ACDT
Sys.setenv(TZ = "Australia/South")
# df$ACDT <- as.POSIXct(df$dt, format="%F %T %z", tz="Australia/South")
df$ACDT <- as.POSIXct(paste(df$dt, "+0930"), format="%F %T %z", tz="Australia/South")
# df$ACDT2 <- as.POSIXct(df$dt, format="%Y-%m-%d %H:%M:%S")
## Australian Central Daylight Time
df$isDST <- as.POSIXlt(df$ACDT)$isdst > 0 ## 是否实行夏令时
df$ES <- fasttime::fastPOSIXct(df$dt,tz="Australia/South")
df$ES <- fasttime::fastPOSIXct(df$dt)
df$new_ES = as.POSIXct(df$dt, format="%Y-%m-%d %H:%M:%S", tz="Australia/South")
## 实行夏令时之前的时间,ACDT+9:30
# df$ED <- df$ES - df$isDST * 3600 ## adjust for DST
df$new_ED = df$new_ES - df$isDST * 3600
df$new_ED <- as.POSIXct(df$new_ED, format="%Y-%m-%d %H:%M:%S")
# df$hr = format(df$ED, "%H")
df[1:5,]
## dt temp demand ACDT isDST
## 1 2018-03-06 09:30:00 20.75 1288 2018-03-06 10:30:00 TRUE
## 2 2018-03-06 10:00:00 21.50 1237 2018-03-06 11:00:00 TRUE
## 3 2018-03-06 10:30:00 22.25 1189 2018-03-06 11:30:00 TRUE
## 4 2018-03-06 11:00:00 23.00 1150 2018-03-06 12:00:00 TRUE
## 5 2018-03-06 11:30:00 23.55 1122 2018-03-06 12:30:00 TRUE
## ES new_ES new_ED
## 1 2018-03-06 20:00:00 2018-03-06 09:30:00 2018-03-06 08:30:00
## 2 2018-03-06 20:30:00 2018-03-06 10:00:00 2018-03-06 09:00:00
## 3 2018-03-06 21:00:00 2018-03-06 10:30:00 2018-03-06 09:30:00
## 4 2018-03-06 21:30:00 2018-03-06 11:00:00 2018-03-06 10:00:00
## 5 2018-03-06 22:00:00 2018-03-06 11:30:00 2018-03-06 10:30:00
df$dst = (as.numeric(df$new_ES) %% 86400) / 86400
# chase = as.POSIXct(df$dt, format="%F %T", tz="UTC")
# new_chase <- as.POSIXct(paste(chase, "+0930"), format="%F %T %z", tz="Australia/South")
# new2_chase = fasttime::fastPOSIXct(new_chase)
## some extra columns ...
d = df
d$yfrac = as.POSIXlt(d$ES)$yday/365
d$year = as.POSIXlt(d$ES)$year+1900
d$dow = (as.POSIXlt(d$ES)$wday + 1) %% 7
Holiday issues:
# d$time = (as.numeric(d$ES) %% 86400) / 86400
# d$dst = (as.numeric(d$ED) %% 86400) / 86400
# d$day = as.numeric(d$ES) %/% 86400
# d$dayd = as.numeric(d$ED) %/% 86400
#chase
df = read.csv("merged_interpolated.csv")
df2 <- df[,c('datetime', 'tempc', 'net_load')]
df2$acst <- as.POSIXct(df2$datetime, format="%F %T", tz="UTC")
df2$acdt <- as.POSIXct(paste(df2$datetime, "+1030"), format="%F %T %z", tz="Australia/South")
head(df2)
## datetime tempc net_load acst acdt
## 1 2018-03-06 09:30:00 20.75 1288 2018-03-06 09:30:00 2018-03-06 09:30:00
## 2 2018-03-06 10:00:00 21.50 1237 2018-03-06 10:00:00 2018-03-06 10:00:00
## 3 2018-03-06 10:30:00 22.25 1189 2018-03-06 10:30:00 2018-03-06 10:30:00
## 4 2018-03-06 11:00:00 23.00 1150 2018-03-06 11:00:00 2018-03-06 11:00:00
## 5 2018-03-06 11:30:00 23.55 1122 2018-03-06 11:30:00 2018-03-06 11:30:00
## 6 2018-03-06 12:00:00 24.10 1102 2018-03-06 12:00:00 2018-03-06 12:00:00
Sys.time()
## [1] "2023-05-05 08:55:35 ACST"
unclass(Sys.time())
## [1] 1683242736
# as.character(.POSIXct(runif(1e4) * unclass(Sys.time()), "GMT"))
## preprocessing
library(lubridate)
## Warning: 程辑包'lubridate'是用R版本4.2.3 来建造的
##
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df = read.csv("merged_interpolated.csv")
df <- df[,c('datetime', 'tempc', 'net_load')]
names(df) <- c('dt', 'temp', 'demand')
df$ACDT <- as.POSIXct(df$dt, format="%Y-%m-%d %H:%M:%S")
df$isDST <- as.POSIXlt(df$ACDT)$isdst > 0
df$ED = df$ACDT - df$isDST * 3600
df$year = as.numeric(format(df$ED, "%Y"))
df$month = as.numeric(format(df$ED," %m"))
df$hour = as.numeric(format(df$ED, "%H"))
df$minutes = as.numeric(format(df$ED, "%M"))
df$DST = (df$hour*60 + df$minutes) / 1440
df$yday = yday(df$ED)
df$yfrac = df$yday/365
df$dw <- wday(df$ED, week_start = 1)
df_weekday = df[df$dw>=1 & df$dw<=5,]
# Load the ggplot2 package
library(ggplot2)
df_weekday2022 = df_weekday[df_weekday$year %in% c(2018,2019,2020,2021,2022),]
# Calculate the mean demand for each year and hour combination
mean_demand <- aggregate(demand ~ year + hour, data = df_weekday2022, FUN = mean)
# Plot a line graph showing the average demand by year and hour
ggplot(mean_demand, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Hour of day") +
ylab("Average demand") +
ggtitle("Average demand by year and hour")

2018-2022 SA demand (by hour of day): (2023 not end yet) * overall
pattern: generally consistent shape, but peak & valleys differ a
lot. * morning peak shift to left; * night peak stay the same
# Load the ggplot2 package
library(ggplot2)
# Calculate the mean demand for each year, month, and hour combination
mean_demand <- aggregate(demand ~ year + month + hour, data = df_weekday2022, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
ggplot(mean_demand, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Hour of day") +
ylab("Average demand") +
ggtitle("Average demand by year, month, and hour") +
facet_wrap(~ month, ncol = 3) +
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"),
legend.text = element_text(size = rel(1)),
legend.title = element_text(size = rel(1.2)))

Drill into 12 months: the pattern still very similar among different
years, (Dec.: much lower in recent years)
# Load the ggplot2 package
library(ggplot2)
df_neat = df_weekday[df_weekday$year %in% c(2018,2022) & df_weekday$month %in% c(11,12), ]
# Calculate the mean demand for each year, month, and hour combination
mean_demand <- aggregate(demand ~ year + month + hour, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
ggplot(mean_demand, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Hour of day") +
ylab("Average demand") +
ggtitle("Average demand by year, month, and hour") +
facet_wrap(~ month, ncol = 1) +
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"),
legend.text = element_text(size = rel(1)),
legend.title = element_text(size = rel(1.2)))

guess?? https://www.roymorgan.com/findings/9091-solar-energy-systems-on-households-more-than-double-since-2018-now-at-nearly-a-third-of-all-households
New South Wales Solar panel rebate program was launched in July 2018
to lower solar panel costs for homes. This legislation promotes solar
panels for homes in all states. The solar rebate program was developed
by Environment Australia and the NSW government to increase renewable
energy systems.
library(ggplot2)
# Calculate the mean demand for each year, month, and hour combination
mean_demand <- aggregate(demand ~ year + month + temp, data = df_weekday, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
ggplot(mean_demand, aes(x = temp, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Temp") +
ylab("Average demand") +
ggtitle("Average demand by year, month, and Temp") +
facet_wrap(~ month, ncol = 4)

df_neat = df_weekday[df_weekday$year %in% c(2018,2022) & df_weekday$month %in% c(3,6,9), ]
mean_demand <- aggregate(demand ~ year + month + temp, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
ggplot(mean_demand, aes(x = temp, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Temp") +
ylab("Average demand") +
ggtitle("Average demand by year, month, and Temp") +
facet_wrap(~ month, ncol = 1)

library(gridExtra)
## Warning: 程辑包'gridExtra'是用R版本4.2.3 来建造的
data2014 = read.csv("original2014.csv")
tuned2014 = data2014[c("DescDate","DSTAscTime","Temp","Month","Demand")]
tuned2014$year = substr(tuned2014$DescDate,1,2)
tuned2014 =tuned2014[tuned2014$year==14,]
# tuned2014$hour = as.numeric(ifelse(grepl(":", tuned2014$DSTAscTime), substr(tuned2014$DSTAscTime, 1, ifelse(nchar(tuned2014$DSTAscTime) == 4, 2, 1)), NA))
tuned2014$hour <- strftime(strptime(tuned2014$DSTAscTime, format="%H:%M"), format="%H")
colnames(tuned2014)[c(3, 4,5)] <- c("temp", "month","demand")
# merge_years <- merge(tuned2014, df_weekday, by = c("year","temp","month","hour","demand"), all = TRUE)
#
# # ------------------------------
# df_neat = merge_years[merge_years$year %in% c(14,2018,2022) & merge_years$month %in% c(3,7,11), ]
#
# # Calculate the mean demand for each year, month, and hour combination
# mean_demand <- aggregate(demand ~ year + month + hour, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
final_tuned2014 = tuned2014[tuned2014$month %in% c(3), ]
mean_demand2014 <- aggregate(demand ~ year + month + hour, data = final_tuned2014, FUN = mean)
a= ggplot(mean_demand2014, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab(NULL) +
ylab("Average demand") +
ggtitle("Average demand in month3 in different hours") +
facet_wrap(~ month, ncol = 1) +
scale_color_manual(values = c("red", "blue", "green"),
name = "Year",
labels = c("2014-NSW", "2018-SA", "2022-SA"))+
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"))
df_neat = df_weekday[df_weekday$year %in% c(2018,2022) & df_weekday$month %in% c(3), ]
# Calculate the mean demand for each year, month, and hour combination
mean_demand <- aggregate(demand ~ year + month + hour, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
b = ggplot(mean_demand, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Hour of day") +
ylab("Average demand") +
facet_wrap(~ month, ncol = 1) +
scale_color_manual(values = c("red", "blue"),
name = "Year",
labels = c("2018-SA", "2022-SA"))+
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"),
legend.text = element_text(size = rel(1)),
legend.title = element_text(size = rel(1.2)))
grid.arrange(a, b, ncol = 1)

Explains our lower R-sqaure for SA 2018-2022
library(gridExtra)
data2014 = read.csv("original2014.csv")
tuned2014 = data2014[c("DescDate","DSTAscTime","Temp","Month","Demand")]
tuned2014$year = substr(tuned2014$DescDate,1,2)
tuned2014 =tuned2014[tuned2014$year==14,]
# tuned2014$hour = as.numeric(ifelse(grepl(":", tuned2014$DSTAscTime), substr(tuned2014$DSTAscTime, 1, ifelse(nchar(tuned2014$DSTAscTime) == 4, 2, 1)), NA))
tuned2014$hour <- strftime(strptime(tuned2014$DSTAscTime, format="%H:%M"), format="%H")
colnames(tuned2014)[c(3, 4,5)] <- c("temp", "month","demand")
# merge_years <- merge(tuned2014, df_weekday, by = c("year","temp","month","hour","demand"), all = TRUE)
#
# # ------------------------------
# df_neat = merge_years[merge_years$year %in% c(14,2018,2022) & merge_years$month %in% c(3,7,11), ]
#
# # Calculate the mean demand for each year, month, and hour combination
# mean_demand <- aggregate(demand ~ year + month + hour, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
final_tuned2014 = tuned2014[tuned2014$month %in% c(11), ]
mean_demand2014 <- aggregate(demand ~ year + month + hour, data = final_tuned2014, FUN = mean)
a= ggplot(mean_demand2014, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab(NULL) +
ylab("Average demand") +
ggtitle("Average demand in month3 in different hours") +
facet_wrap(~ month, ncol = 1) +
scale_color_manual(values = c("red", "blue", "green"),
name = "Year",
labels = c("2014-NSW", "2018-SA", "2022-SA"))+
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"))
df_neat = df_weekday[df_weekday$year %in% c(2018,2022) & df_weekday$month %in% c(11), ]
# Calculate the mean demand for each year, month, and hour combination
mean_demand <- aggregate(demand ~ year + month + hour, data = df_neat, FUN = mean)
# Plot a line graph showing the average demand by year, month, and hour, split by month
b = ggplot(mean_demand, aes(x = hour, y = demand, group = year, color = as.factor(year))) +
geom_line() +
xlab("Hour of day") +
ylab("Average demand") +
facet_wrap(~ month, ncol = 1) +
scale_color_manual(values = c("red", "blue"),
name = "Year",
labels = c("2018-SA", "2022-SA"))+
theme(plot.title = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1)),
strip.text = element_text(size = rel(1.2)),
strip.background = element_rect(fill = "white"),
legend.text = element_text(size = rel(1)),
legend.title = element_text(size = rel(1.2)))
grid.arrange(a, b, ncol = 1)
